#include <stdlib.h>
#include <assert.h>
#include <string.h>
+#include <pthread.h>
#include "blockstore.h"
#include "radix.h"
#define DEBUG
*/
+/*
+#define STAGED
+*/
+
#define ZERO 0LL
#define ONE 1LL
#define ONEMASK 0xffffffffffffffeLL
typedef u64 *radix_tree_node;
+
+/* Experimental radix cache. */
+
+static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
+static int rcache_count = 0;
+#define RCACHE_MAX 1024
+
+typedef struct rcache_st {
+ radix_tree_node *node;
+ u64 id;
+ struct rcache_st *hash_next;
+ struct rcache_st *cache_next;
+ struct rcache_st *cache_prev;
+} rcache_t;
+
+static rcache_t *rcache_head = NULL;
+static rcache_t *rcache_tail = NULL;
+
+#define RCHASH_SIZE 512ULL
+rcache_t *rcache[RCHASH_SIZE];
+#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
+
+void __rcache_init(void)
+{
+ int i;
+printf("rcache_init!\n");
+
+ for (i=0; i<RCHASH_SIZE; i++)
+ rcache[i] = NULL;
+}
+
+
+void rcache_write(u64 id, radix_tree_node *node)
+{
+ rcache_t *r, *tmp, **curs;
+
+ pthread_mutex_lock(&rcache_mutex);
+
+ /* Is it already in the cache? */
+ r = rcache[RCACHE_HASH(id)];
+
+ for (;;) {
+ if (r == NULL)
+ break;
+ if (r->id == id)
+ {
+ memcpy(r->node, node, BLOCK_SIZE);
+
+ /* bring to front. */
+ if (r != rcache_head) {
+
+ if (r == rcache_tail) {
+ if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
+ rcache_tail->cache_next = NULL;
+ }
+
+ tmp = r->cache_next;
+ if (r->cache_next != NULL) r->cache_next->cache_prev
+ = r->cache_prev;
+ if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
+
+ r->cache_prev = NULL;
+ r->cache_next = rcache_head;
+ if (rcache_head != NULL) rcache_head->cache_prev = r;
+ rcache_head = r;
+ }
+
+//printf("Update (%Ld)\n", r->id);
+ goto done;
+ }
+ r = r->hash_next;
+ }
+
+ if ( rcache_count == RCACHE_MAX )
+ {
+ /* Remove an entry */
+
+ r = rcache_tail;
+ if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
+ rcache_tail->cache_next = NULL;
+ freeblock(r->node);
+
+ curs = &rcache[RCACHE_HASH(r->id)];
+ while ((*curs) != r)
+ curs = &(*curs)->hash_next;
+ *curs = r->hash_next;
+//printf("Evict (%Ld)\n", r->id);
+
+ } else {
+
+ r = (rcache_t *)malloc(sizeof(rcache_t));
+ rcache_count++;
+ }
+
+ r->node = newblock();
+ memcpy(r->node, node, BLOCK_SIZE);
+ r->id = id;
+
+ r->hash_next = rcache[RCACHE_HASH(id)];
+ rcache[RCACHE_HASH(id)] = r;
+
+ r->cache_prev = NULL;
+ r->cache_next = rcache_head;
+ if (rcache_head != NULL) rcache_head->cache_prev = r;
+ rcache_head = r;
+ if (rcache_tail == NULL) rcache_tail = r;
+
+//printf("Added (%Ld, %p)\n", id, r->node);
+done:
+ pthread_mutex_unlock(&rcache_mutex);
+}
+
+radix_tree_node *rcache_read(u64 id)
+{
+ rcache_t *r, *tmp;
+ radix_tree_node *node = NULL;
+
+ pthread_mutex_lock(&rcache_mutex);
+
+ r = rcache[RCACHE_HASH(id)];
+
+ for (;;) {
+ if (r == NULL) {
+//printf("Miss (%Ld)\n", id);
+ goto done;
+ }
+ if (r->id == id) break;
+ r = r->hash_next;
+ }
+
+ /* bring to front. */
+ if (r != rcache_head)
+ {
+ if (r == rcache_tail) {
+ if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
+ rcache_tail->cache_next = NULL;
+ }
+ tmp = r->cache_next;
+ if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
+ if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
+
+ r->cache_prev = NULL;
+ r->cache_next = rcache_head;
+ if (rcache_head != NULL) rcache_head->cache_prev = r;
+ rcache_head = r;
+ }
+
+ node = newblock();
+ memcpy(node, r->node, BLOCK_SIZE);
+
+//printf("Hit (%Ld, %p)\n", id, r->node);
+done:
+ pthread_mutex_unlock(&rcache_mutex);
+
+ return(node);
+}
+
+
+void *rc_readblock(u64 id)
+{
+ void *ret;
+
+ ret = (void *)rcache_read(id);
+
+ if (ret != NULL) return ret;
+
+ ret = readblock(id);
+
+ if (ret != NULL)
+ rcache_write(id, ret);
+
+ return(ret);
+}
+
+u64 rc_allocblock(void *block)
+{
+ u64 ret;
+
+ ret = allocblock(block);
+
+ if (ret != ZERO)
+ rcache_write(ret, block);
+
+ return(ret);
+}
+
+int rc_writeblock(u64 id, void *block)
+{
+ int ret;
+
+ ret = writeblock(id, block);
+ rcache_write(id, block);
+
+ return(ret);
+}
+
+
/*
* block device interface and other helper functions
* with these functions, block id is just a 63-bit number, with
*
* @return: value on success, zero on error
*/
+#ifndef STAGED
+
u64 lookup(int height, u64 root, u64 key) {
radix_tree_node node;
u64 mask = ONE;
return ZERO;
oldroot = root;
- node = (radix_tree_node) readblock(getid(root));
+ node = (radix_tree_node) rc_readblock(getid(root));
if (node == NULL)
return ZERO;
return ZERO;
}
+#else /* STAGED */
+
+
+/* non-recursive staged lookup, assume height is 35. */
+u64 lookup(int height, u64 root, u64 key) {
+ radix_tree_node node;
+ u64 mask = ONE;
+
+printf("lookup!\n");
+ assert(key >> 35 == 0);
+
+ /* the root block may be smaller to ensure all leaves are full */
+ height = 27;
+
+ /* now carve off equal sized chunks at each step */
+
+ /* ROOT: (LEVEL 0) KEYLEN=35*/
+ if (getid(root) == ZERO)
+ return ZERO;
+
+ node = (radix_tree_node) readblock(getid(root));
+ if (node == NULL)
+ return ZERO;
+
+ root = node[(key >> height) & RADIX_TREE_MAP_MASK];
+ mask &= root;
+ freeblock(node);
+
+ if (height == 0)
+ return ( root & ONEMASK ) | mask;
+
+ height -= RADIX_TREE_MAP_SHIFT; /* == 18 */
+
+ /* LEVEL 1: KEYLEN=26*/
+ if (getid(root) == ZERO)
+ return ZERO;
+
+ node = (radix_tree_node) readblock(getid(root));
+ if (node == NULL)
+ return ZERO;
+
+ root = node[(key >> height) & RADIX_TREE_MAP_MASK];
+ mask &= root;
+ freeblock(node);
+
+ if (height == 0)
+ return ( root & ONEMASK ) | mask;
+
+ height -= RADIX_TREE_MAP_SHIFT; /* == 9 */
+
+ /* LEVEL 2: KEYLEN=17*/
+ if (getid(root) == ZERO)
+ return ZERO;
+
+ node = (radix_tree_node) readblock(getid(root));
+ if (node == NULL)
+ return ZERO;
+
+ root = node[(key >> height) & RADIX_TREE_MAP_MASK];
+ mask &= root;
+ freeblock(node);
+
+ if (height == 0)
+ return ( root & ONEMASK ) | mask;
+
+ height -= RADIX_TREE_MAP_SHIFT; /* == 0 */
+
+ /* LEVEL 3: KEYLEN=8*/
+ if (getid(root) == ZERO)
+ return ZERO;
+
+ node = (radix_tree_node) readblock(getid(root));
+ if (node == NULL)
+ return ZERO;
+
+ root = node[(key >> height) & RADIX_TREE_MAP_MASK];
+ mask &= root;
+ freeblock(node);
+
+ // if (height == 0)
+ return ( root & ONEMASK ) | mask;
+
+}
+
+#endif
+
/*
* update: set a radix tree entry, doing copy-on-write as necessary
* @height: height in bits of the radix tree
*
* @returns: (possibly new) root id on success (with LSB=1), 0 on failure
*/
+
+#ifndef STAGED
+
+
u64 update(int height, u64 root, u64 key, u64 val) {
int offset;
u64 child;
if (root == ZERO) {
node = (radix_tree_node) newblock();
} else {
- node = (radix_tree_node) readblock(getid(root));
+ node = (radix_tree_node) rc_readblock(getid(root));
if (!iswritable(root)) {
/* need to clone this node */
/* new/cloned blocks need to be saved */
if (root == ZERO) {
/* mark this as an owned block */
- root = allocblock(node);
+ root = rc_allocblock(node);
if (root)
root = writable(root);
- } else if (writeblock(getid(root), node) < 0) {
+ } else if (rc_writeblock(getid(root), node) < 0) {
freeblock(node);
return ZERO;
}
return root;
}
+
+#else /* STAGED */
+
+/* When update is called, state->next points to the thing to call after
+ * update is finished. */
+
+struct cb_state_st;
+
+typedef struct {
+ /* public stuff */
+ u64 val;
+ u64 key;
+ u64 result;
+
+ /* internal state */
+ u64 root[4];
+ radix_tree_node node[4];
+ void (*next)(struct cb_state_st *);
+ int err;
+} radix_update_t;
+
+typedef struct cb_state_st{
+ void (*next)(struct cb_state_st *); /* Next continuation. */
+ union {
+ radix_update_t update;
+ } radix;
+} cb_state_t;
+
+void s_readblock(cb_state_t *state, u64 id, void **ret)
+{
+ *ret = readblock(id);
+ state->next(state);
+}
+
+void s_allocblock(cb_state_t *state, void *block, u64 *ret)
+{
+ *ret = allocblock(block);
+ state->next(state);
+}
+
+void s_writeblock(cb_state_t *state, u64 id, void *block, int *ret)
+{
+ *ret = writeblock(id, block);
+ state->next(state);
+}
+
+void cb_done(cb_state_t *state)
+{
+ printf("*** done ***\n");
+}
+
+/* forward prototypes. */
+void up0(cb_state_t *state);
+void up1(cb_state_t *state);
+void up2(cb_state_t *state);
+void up3(cb_state_t *state);
+void up4(cb_state_t *state);
+void up5(cb_state_t *state);
+void up6(cb_state_t *state);
+void up7(cb_state_t *state);
+void up8(cb_state_t *state);
+void up9(cb_state_t *state);
+void up10(cb_state_t *state);
+void up11(cb_state_t *state);
+void up12(cb_state_t *state);
+
+u64 update(int height, u64 root, u64 key, u64 val)
+{
+ cb_state_t state;
+ radix_update_t *u = &state.radix.update;
+
+ u->val = val;
+ u->key = key;
+ u->root[0] = root;
+ u->root[1] = u->root[2] = u->root[3] = ZERO;
+ u->node[0] = u->node[1] = u->node[2] = u->node[3] = NULL;
+
+ /* take a copy of the higher-scoped next continuation. */
+ u->next = state->next;
+
+ /* update start state */
+ state->next = up0;
+
+ for (;;)
+ {
+ state->next(state);
+ if (state->next == NULL)
+ break;
+ }
+
+ return u->result;
+}
+
+/* c0:*/
+void up0(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ state->next = up1;
+ s_readblock(state, getid(u->root[0]), (void **)&(u->node[0]));
+}
+
+/* c1: */
+void up1(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ u->root[1] = u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK];
+ if (u->root[1] == ZERO) {
+ u->node[1] = (radix_tree_node) newblock();
+ /* goto next continuation (c2)*/ up2(state);return;
+ } else {
+ state->next = up2;
+ s_readblock(state, getid(u->root[1]), (void **)&(u->node[1]));
+ }
+}
+
+/* c2: */
+void up2(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if ((u->root[1] != ZERO) && (!iswritable(u->root[1]))) {
+ /* need to clone this node */
+ radix_tree_node oldnode = u->node[1];
+ u->node[1] = cloneblock(u->node[1]);
+ freeblock(oldnode);
+ u->root[1] = ZERO;
+ }
+ u->root[2] = u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK];
+ if (u->root[2] == ZERO) {
+ u->node[2] = (radix_tree_node) newblock();
+ /* goto next continuation (c3)*/ up3(state);return;
+ } else {
+ state->next = up3;
+ s_readblock(state, getid(u->root[2]), (void **)&(u->node[2]));
+ }
+}
+
+/* c3: */
+void up3(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if ((u->root[2] != ZERO) && (!iswritable(u->root[2]))) {
+ /* need to clone this node */
+ radix_tree_node oldnode = u->node[2];
+ u->node[2] = cloneblock(u->node[2]);
+ freeblock(oldnode);
+ u->root[2] = ZERO;
+ }
+ u->root[3] = u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK];
+ if (u->root[3] == ZERO) {
+ u->node[3] = (radix_tree_node) newblock();
+ /* goto next continuation (c4)*/ up4(state);return;
+ } else {
+ state->next = up4;
+ s_readblock(state, getid(u->root[3]), (void **)&(u->node[3]));
+ }
+}
+
+/* c4: */
+void up4(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if ((u->root[3] != ZERO) && (!iswritable(u->root[3]))) {
+ /* need to clone this node */
+ radix_tree_node oldnode = u->node[3];
+ u->node[3] = cloneblock(u->node[3]);
+ freeblock(oldnode);
+ u->root[3] = ZERO;
+ }
+
+ if (u->node[3][u->key & RADIX_TREE_MAP_MASK] == u->val){
+ /* no change, so we already owned the child */
+ /* goto last continuation (c12) */ up12(state);return;
+ }
+
+ u->node[3][u->key & RADIX_TREE_MAP_MASK] = u->val;
+
+ /* new/cloned blocks need to be saved */
+ if (u->root[3] == ZERO) {
+ /* mark this as an owned block */
+ state->next = up5;
+ s_allocblock(state, u->node[3], &u->root[3]);
+ /* goto continuation (c5) */ return;
+ } else {
+ state->next = up6;
+ s_writeblock(state, getid(u->root[3]), u->node[3], &u->err);
+ /* goto continuation (c6) */ return;
+ }
+}
+
+/* c5: */
+void up5(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->root[3])
+ u->root[3] = writable(u->root[3]);
+ /* goto continuation (c6) */ up6(state);return;
+}
+
+/* c6: */
+void up6(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] == u->root[3]){
+ /* no change, so we already owned the child */
+ /* goto last continuation (c12) */ up12(state);return;
+ }
+
+ u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] = u->root[3];
+
+ /* new/cloned blocks need to be saved */
+ if (u->root[2] == ZERO) {
+ /* mark this as an owned block */
+ state->next = up7;
+ s_allocblock(state, u->node[2], &u->root[2]);
+ /* goto continuation (c7) */return;
+ } else {
+ state->next = up8;
+ s_writeblock(state, getid(u->root[2]), u->node[2], &u->err);
+ /* goto continuation (c8) */return;
+ }
+}
+
+/* c7: */
+void up7(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->root[2])
+ u->root[2] = writable(u->root[2]);
+ /* goto continuation (c8) */ up8(state);return;
+}
+
+/* c8: */
+void up8(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] == u->root[2]){
+ /* no change, so we already owned the child */
+ /* goto last continuation (c12) */ up12(state);return;
+ }
+
+ u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] = u->root[2];
+
+ /* new/cloned blocks need to be saved */
+ if (u->root[1] == ZERO) {
+ /* mark this as an owned block */
+ state->next = up9;
+ s_allocblock(state, u->node[1], &u->root[1]);
+ /* goto continuation (c9) */return;
+ } else {
+ state->next = up10;
+ s_writeblock(state, getid(u->root[1]), u->node[1], &u->err);
+ /* goto continuation (c10) */return;
+ }
+}
+
+/* c9: */
+void up9(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->root[1])
+ u->root[1] = writable(u->root[1]);
+ /* goto continuation (c10) */ up10(state);return;
+}
+
+/* c10: */
+void up10(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] == u->root[1]){
+ /* no change, so we already owned the child */
+ /* goto last continuation (c12) */ up12(state);return;
+ }
+
+ u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] = u->root[1];
+
+ /* new/cloned blocks need to be saved */
+ if (u->root[0] == ZERO) {
+ /* mark this as an owned block */
+ state->next = up11;
+ s_allocblock(state, u->node[0], &u->root[0]);
+ /* goto continuation (c11) */ return;
+ } else {
+ state->next = up10;
+ s_writeblock(state, getid(u->root[0]), u->node[0], &u->err);
+ /* goto continuation (c12) */ return;
+ }
+}
+
+/* c11: */
+void up11(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ if (u->root[0])
+ u->root[0] = writable(u->root[0]);
+ /* goto continuation (c12) */ up12(state);return;
+}
+
+/* c12: */
+void up12(cb_state_t *state) {
+ radix_update_t *u = &state->radix.update;
+
+ int i;
+ for (i=0;i<4;i++)
+ if(u->node[i] != NULL) freeblock(u->node[i]);
+
+ u->result = u->root[0];
+ state->next = u->next;
+
+ state->next(state);return;
+}
+
+#endif
+
+
/**
* snapshot: create a snapshot
* @root: old root node
u64 snapshot(u64 root) {
radix_tree_node node, newnode;
- if ((node = readblock(getid(root))) == NULL)
+ if ((node = rc_readblock(getid(root))) == NULL)
return ZERO;
newnode = cloneblock(node);
if (newnode == NULL)
return ZERO;
- root = allocblock(newnode);
+ root = rc_allocblock(newnode);
freeblock(newnode);
if (root == ZERO)